About The Data set

Sales & inventory data for a fictitious chain of toy stores in Mexico called Maven Toys, including information about products, stores, daily sales transactions, and current inventory levels at each location

Top Categories by Total Revenue

# Top category by sales
# Join sales and products
sales_products <- left_join(sales, products, by = "Product_ID")
# Calculate total sales by category
by_category <- sales_products |>
  group_by(Product_Category) |>
  summarise(Total_Sales = sum(Units * Product_Price)) |>
  arrange(desc(Total_Sales))
# Create bar chart with ggplot2
p <- ggplot(by_category, aes(x = fct_reorder(Product_Category, Total_Sales), y = Total_Sales, fill = Product_Category, text = paste(Product_Category, "Total Sale: ", scales::dollar(Total_Sales)))) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = scales::dollar(Total_Sales), y = Total_Sales), vjust = -0.5, color = "black", size = 3) +
  labs(title = "Top Ten Categories by Sales", x = "Product Category", y = "Sales") +
  scale_y_continuous(labels = scales::dollar_format()) + 
  theme(legend.position = "none")
# Convert to Plotly
p_plotly <- ggplotly(p, tooltip = "text")
p_plotly

Explanation of the Chart

The chart shows how much revenue each product category made. Toys made the most revenue, more than $5.09 million. Sports & Outdoors products made 2.17 million revenue, which is the least of all.

Total Cost by Each Category

# Cost of category
total_cost <- sales_products |>
  group_by(Product_Category) |>
  summarise(Cost = sum(Product_Cost))
viz <- ggplot(total_cost, aes(x = reorder(Product_Category, Cost), y = Cost, fill = Product_Category, text = paste("Toatl Cost: ", scales::dollar(Cost))))+
  geom_bar(stat = "identity")+
  geom_text(aes(label = scales::dollar(Cost), y = Cost), vjust = -0.5, color = "black", size = 3) +
  labs(title = "Total Cost by Each Category", y= "Cost", x= "Category")+
  scale_y_continuous(labels = scales::dollar_format())+
  theme_minimal()+
  theme(legend.position = "none")
cost_viz <- ggplotly(viz, tooltip = "text")
cost_viz

Explanation

  • We spent the most money on Toys, about $3 million more than any other product category.
  • We spent the least money on Electronics, less than $1 million.
  • We should look for ways to improve our sales of Electronics, as they have a low cost and a high potential market.

Most Profitable Category

sales_products <- sales_products |>
  mutate(Profit = (Product_Price - Product_Cost) * Units)
total_profit <- sales_products |>
  group_by(Product_Category) |>
  summarise(Profits = sum(Profit))
viz <- ggplot(total_profit, aes(x = reorder(Product_Category, Profits), y = Profits, fill = Product_Category, text = paste("Total Profit: ", scales::dollar(Profits)))) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = scales::dollar(Profits), y = Profits), vjust = -0.5, color = "black", size = 3) +
  labs(title = "Total Profit by Each Category", y= "Profit", x= "Category") +
  scale_y_continuous(labels = scales::dollar_format()) +
  theme_minimal() +
  theme(legend.position = "none")
profit_viz <- ggplotly(viz, tooltip = "text")
profit_viz

Explanation

  • Toys and Electronics are our most profitable products. We made over $1 million from each of them.
  • Sports & Outdoors, Games, and Art & Crafts are our least profitable products. We made less than $800k from each of them.
  • We should sell more Toys and Electronics to earn more money. We should also improve our sales of the other products to make them more profitable.

Sale Analysis Based on Location

# Summarize sales by location type 
sales_stores <- sales_stores |>
  mutate(Profit = (Product_Price - Product_Cost) * Units)
sales_by_location <- sales_stores %>%
  group_by(Store_Location) %>%
  summarize(Total_Sales = sum(Units * Product_Price),
            Total_Costs = sum(Units * Product_Cost),
            Total_Profits = sum(Profit)) 
# Calculate profit margin
sales_by_location <- sales_by_location %>%
  mutate(Profit_Margin = Total_Profits / Total_Sales) 
# Melt the data for better visualization
sales_melted <- sales_by_location %>%
  pivot_longer(cols = c(Total_Sales, Total_Costs, Total_Profits, Profit_Margin), names_to = "Metric", values_to = "Value") 
# Specify the order of the levels for the "Metric" variable
metric_order <- c("Total_Profits", "Total_Costs", "Total_Sales", "Profit_Margin")  # Adjusted order
sales_melted$Metric <- factor(sales_melted$Metric, levels = metric_order) 
# Plotting with ggplot2
sales_plot <- ggplot(sales_melted, aes(x = Store_Location, y = Value, fill = Metric, text = paste(Metric, ": ", ifelse(Metric == "Profit_Margin", scales::percent(Value), scales::dollar(Value))))) +
  geom_bar(stat = "identity", position = "stack", color = "white") +
  labs(title = "Sales Analysis By Location Type", y = "Amount", x = "Store Location") +
  scale_fill_manual(values = c("#8da0cb", "#fc8d62", "#66c2a5", "#e78ac3"), name = "Metric") +
  theme_minimal() +  
  geom_text(position = position_stack(vjust = 0.5), aes(label = ifelse(Metric == "Profit_Margin", scales::percent(Value), scales::dollar(Value))), color = "black", size=3) 
interactive_plot <- ggplotly(sales_plot, tooltip = "text") 
interactive_plot

Explanation

Downtown leads in both total sales, reaching $8.21M, and profits, totaling about $2.25M, highlighting its strong performance. Commercial follows with $3.2M in sales and $927K in profits. However, the Airport lags behind with the lowest sales at $1.3M. To maximize profits, focus marketing efforts on Downtown, assess Commercial’s cost structure, and evaluate the performance of the struggling Airport location for potential improvements or closure. Additionally, aim to enhance profitability margins; Downtown and Commercial boast 27.36% and 28.27% respectively, while Airport and Residential show room for improvement at 29.31% and 27.80%.

Top 10 Stores By Revenue

# Top 10 Stores By Total Sales
sales_by_store <- sales_stores |>
  group_by(Store_Name) |>
  summarise(totalsales = sum(Product_Price * Units)) |>
  arrange(desc(totalsales)) |>
  slice_head(n = 10)
# Visualization
plot_sales_by_store <- ggplot(sales_by_store, aes(x =fct_reorder(Store_Name, totalsales), y=totalsales, fill = Store_Name, text=paste("Total Sales: ", scales::dollar(totalsales)))) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = scales::dollar(totalsales), y = totalsales), vjust = -0.5, color = "black", size = 3) +
  labs(title = "Top Ten Stores by Sales", x = "Store Name", y = "Sales") +
  scale_y_continuous(labels = scales::dollar_format()) +
  theme_minimal() +
  theme(legend.position = "none")+
  coord_flip()
# Interactive plot
interactive_visuales <- ggplotly(plot_sales_by_store, tooltip = "text")
interactive_visuales

Explanation

The chart shows which store is generation the most revenue.

Average of Units Sold of Each Product each day

# Daily Average Units Solds per Product
# Extract date 
sales_products <- sales_products |>
  mutate(Date = as.Date(Date))
# Units sold per product per day
daily_sales <- sales_products |> 
  group_by(Product_ID, Date) |>
  summarise(Units = sum(Units))
# Average daily units sold
avg_daily <- daily_sales |>
  group_by(Product_ID) |>
  summarise(Avg_Daily_Units = mean(Units))
# Plotting the average daily units sold
avg_daily_plot <- ggplot(avg_daily, aes(x = reorder(Product_ID, Avg_Daily_Units), y = Avg_Daily_Units, text=paste("Product ID: ", Product_ID," Sold ", round(Avg_Daily_Units,1), " Units Daily"))) +
  geom_point(size = 3, color = "yellow") +
  geom_text(aes(label = scales::number(round(Avg_Daily_Units, 1))), vjust = -0.5, size = 3) +
  labs(title = "Average Daily Units Sold per Product", x = "Product ID", y = "Average Daily Units Sold") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
interactive_avg_daily_plot <- ggplotly(avg_daily_plot, tooltip = "text")
interactive_avg_daily_plot

Explanation

The chart demonstrate the average number of units of each product sold per day * Product_ID = 3 is the top sold product with 217.7 units sold daily on average * And Product_ID 20 and 35 are the at the bottom.

Daily Total Sales/Revenue generated

# Daily Total Sales
# Extract date
sales_stores <- sales_stores |>
  mutate(Date = as.Date(Date))
# Daily sales 
daily_sales <- sales_stores |>
  group_by(Date) |>
  summarize(Daily_Sales = sum(Units * Product_Price))
# Plotting daily sales
daily_sales_plot <- ggplot(daily_sales, aes(x = Date, y = Daily_Sales, text = paste("Total Sale on ",Date, "Was", scales::dollar(Daily_Sales)))) +
  geom_line(aes(group = 1), color = "blue", size = 2) +  # Include the group parameter
  geom_point(color = "black", size = 2) +
  labs(title = "Daily Sales Over Time", x = "Date", y = "Daily Sales") +
  theme_minimal()
interactive_chart <- ggplotly(daily_sales_plot, tooltip = "text")
interactive_chart

Explanation

  • There is a spike in sales on the 30th of April each year

Week Days Sales Trend

# Week_Days Sales Trends 
# Extract weekday
sales_stores <- sales_stores |>
  mutate(Weekday = wday(Date, label=TRUE))
# Sales by weekday
sales_by_wkday <- sales_stores |>
  group_by(Weekday) |>
  summarize(Sales = sum(Units*Product_Price))
# Plotting sales by weekday
sales_by_wkday_plot <- ggplot(sales_by_wkday, aes(x = Weekday, y = Sales, text=paste("Total Sale on: ", Weekday, "is ", scales::dollar(Sales)))) +
  geom_bar(stat = "identity", fill="skyblue") +
  labs(title = "Sales by Weekday", x = "Weekday", y = "Sales") +
  scale_y_continuous(labels = scales::dollar_format()) +
  scale_x_discrete(labels = c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")) +  # Set weekday labels
  theme_minimal()
Viz_sales_by_wkday_plot <- ggplotly(sales_by_wkday_plot, tooltip = "text")
Viz_sales_by_wkday_plot

Top 10 Products by Unit Solds

###Top 10 Products by Sale###
# Group by Product_ID and summarize sales
prod_sales <- sales_products |>
  group_by(Product_ID) |>
  summarise(Unit_Solds = sum(Units))
# Arrange descending by sales 
prod_sales <- prod_sales |>
  arrange(desc(Unit_Solds))
# Take the top 10 rows
top_10 <- head(prod_sales, 10)
# Plotting top products by sales
top_10_plot <- ggplot(top_10, aes(x = reorder(Product_ID, Unit_Solds), y = Unit_Solds, text= paste("Unit Solds: ", Unit_Solds))) +
  geom_bar(stat = "identity", fill="#e60049") +
  labs(title = "Top 10 Products by Unit Solds", x = "Total Sales", y = "Product ID") +
  theme_minimal()
top_10_plot_viz <- ggplotly(top_10_plot, tooltip = "text")
top_10_plot_viz

Top 10 Products by Profit

# Most profitable products
sales_products <- sales_products |>
  mutate(Profit = (Product_Price - Product_Cost) * Units)
product_profits <- sales_products |>
  group_by(Product_ID) |>
  summarize(Total_Profit = sum(Profit))
product_profits <- arrange(product_profits, desc(Total_Profit))
Top_Products <- head(product_profits, 10)
# Interactive Viz for the top 10 products
profit_viz <- ggplot(Top_Products, aes(x = Total_Profit, y = reorder(Product_ID, -Total_Profit), text = paste("Total Profit: ", scales::dollar(Total_Profit)))) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Top 10 Most Profitable Products", x = "Total Profit", y = "Product ID") +
  scale_x_continuous(labels = scales::dollar_format()) +  # Format x-axis labels as dollars
  theme_minimal() +
  theme(legend.position = "none") +
  coord_flip()
p_plotly <- ggplotly(profit_viz, tooltip = "text")
p_plotly

Monthly Sales Trend

# How do sales fluctuate over the course of a year?
# Extract month and year from date 
sales_stores <- sales_stores |>
  mutate(Month = lubridate::month(Date), 
         Year = lubridate::year(Date))
# Aggregate sales by month and year  
monthly_sales <- sales_stores |>
  group_by(Year, Month) |>
  summarise(Sales = sum(Units * Product_Price))
# Plotting monthly sales over the course of a year using a bar chart
monthly_sales_bar_plot <- ggplot(monthly_sales, aes(x = interaction(Year, Month, drop = TRUE), y = Sales, fill =as.factor(Year), text=paste("Total Sales in Month", Month," of Year",Year ,"is", scales::dollar(Sales)))) +
  geom_bar(stat = "identity") +
  labs(title = "Monthly Sales Over the Year", x = "Month",  y = "Sales") +
  scale_x_discrete(labels = scales::date_format("%b %Y")) +  # Format labels as Month Year
  scale_fill_viridis_d() +  # Using a color scale for years
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
monthly_sales_plot_viz <- ggplotly(monthly_sales_bar_plot, tooltip = "text")
monthly_sales_plot_viz

Top 5 Stores By Profit

# Top/Bottom stores by Profit
sales_stores <- sales_stores |>
  mutate(Profit = (Product_Price - Product_Cost) * Units)
store_profits <- sales_stores |> 
  group_by(Store_ID) |>
  summarize(Total_Profit = sum(Profit))
store_profits <- arrange(store_profits, desc(Total_Profit))
top_stores <- head(store_profits, 5)
bottom_stores <- tail(store_profits, 5)
# Create diverging bar chart for top stores
top_stores_chart <- ggplot(top_stores, aes(x = reorder(Store_ID, -Total_Profit), y = Total_Profit, fill= Store_ID, text=paste("Total Profit: ", scales::dollar(Total_Profit)))) +
  geom_bar(stat = "identity") +
  labs(title = "Top Stores by Profit", x = "Store ID", y = "Total Profit") +
  theme_minimal() +
  theme(legend.position = "none")
int_top_stores_chart <- ggplotly(top_stores_chart, tooltip = "text")
int_top_stores_chart

Bottom 5 Stores by Profit

#Bottom 10 Stores by Profit
# Create diverging bar chart for bottom stores
bottom_stores_chart <- ggplot(bottom_stores, aes(x = reorder(Store_ID, Total_Profit), y = Total_Profit, fill=Store_ID, text=paste("Total Profit: ", scales::dollar(Total_Profit)))) +
  geom_bar(stat = "identity", color = "black") +
  labs(title = "Bottom Stores by Profit", x = "Store ID", y = "Total Profit") +
  theme_minimal() +
  theme(legend.position = "none") 
int_bottom_stores_chart <- ggplotly(bottom_stores_chart, tooltip = "text")
int_bottom_stores_chart